Note: 1) data was downloaded from this paper https://www.cell.com/cell-metabolism/fulltext/S1550-4131(22)00394-1?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS1550413122003941%3Fshowall%3Dtrue#sectitle0030

  1. qs file was downloaded from GEO: GSE183290 https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE183288

  2. this script is simply executed in the docker. docker run –rm -p 8787:8787 -e USER=jyu -e PASSWORD=jyu -e ROOT=TRUE -v /home/agschlitzer/sciebo/Projects2023/Theodorou_ILC2_ownerJYu:/home/jyu/rstudio jiangyanyu/jyu_r4.1.2:1.0

1 general steps

knitr::opts_chunk$set(warning=FALSE, messgae=FALSE, fig.path='Figs/', results = "hide")
## fig.width=4, fig.height=4

1.1 libraries

library(Seurat)
## Attaching SeuratObject
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.10.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(data.table)
remotes::install_version("DescTools",version = "0.99.44")
## Downloading package from url: https://packagemanager.rstudio.com/cran/__linux__/focal/latest/src/contrib/Archive/DescTools/DescTools_0.99.44.tar.gz
## Installing package into '/usr/local/lib/R/site-library'
## (as 'lib' is unspecified)
library(DescTools)
## 
## Attaching package: 'DescTools'
## The following object is masked from 'package:data.table':
## 
##     %like%
library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(ggplot2)
install.packages("qs")
## Installing package into '/usr/local/lib/R/site-library'
## (as 'lib' is unspecified)
library(qs)
## qs 0.25.7

2 working directory

working_dir = "/home/jyu/rstudio/"

3 load yang published dataset

yang_data = qread(file = paste0(working_dir,"/GSE183288_Single_cell_atlas.qs"))

yang_data

DimPlot(yang_data,raster = TRUE,label = TRUE)
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`

4 extract ILC2/T/NK

yang_ilc2tnk = subset(yang_data,idents = c("NK","T","ILC"))
rm(yang_data)
gc()

DimPlot(yang_ilc2tnk,raster = TRUE)

# saveRDS(yang_ilc2tnk,file = paste0(working_dir,"/manuscript/data/yang_ilc2tnk_20230616.rds"))

4.1 re-do seurat analysis

## select only vWAT cells

yang_ilc2tnk_vwat = subset(yang_ilc2tnk,subset=tissue=="vWAT")

rm(yang_ilc2tnk)
gc()

## redo umap and clustering analysis

yang_ilc2tnk_vwat = NormalizeData(yang_ilc2tnk_vwat)
yang_ilc2tnk_vwat = FindVariableFeatures(yang_ilc2tnk_vwat,selection.method = "vst",nfeatures = 2000)
yang_ilc2tnk_vwat = ScaleData(yang_ilc2tnk_vwat)
## Centering and scaling data matrix
yang_ilc2tnk_vwat = RunPCA(yang_ilc2tnk_vwat,features = VariableFeatures(yang_ilc2tnk_vwat))
## PC_ 1 
## Positive:  Rora, Il1rl1, Il2ra, Gadd45b, Hilpda, Furin, Nfkb1, Nfkbia, Ltb4r1, Nfkbiz 
##     Il7r, S100a4, Areg, Ramp3, Pim1, Tmem64, Cxcr6, Ramp1, Cish, Ptpn13 
##     Fosb, Samsn1, Nrip1, Tmem176a, Tmem176b, Ell2, Itgav, Gata3, Dot1l, Nr4a1 
## Negative:  Ccl5, Nkg7, Fcer1g, Tyrobp, Gzma, Irf8, Klrd1, Ms4a4b, AW112010, Ugcg 
##     Klre1, Prf1, Ctsw, Serpinb9, Klrb1c, Txk, Klrk1, Dusp2, Ccl4, Ncr1 
##     Klra8, Anxa2, Sell, Serpinb6b, Klrc2, Klra9, Eomes, Plek, Gzmb, Klra4 
## PC_ 2 
## Positive:  Il1rl1, Ctla2a, Nfkbiz, Nr4a1, Gadd45b, Nfkb1, Gata3, Zfp36, Areg, Rnf128 
##     Arg1, Fosb, Hs3st1, Tyrobp, Id2, Csf2, Slc7a8, Nfkbia, Ccdc184, Irf8 
##     Fcer1g, Gm42031, Gzma, Sytl3, Alox5, Ptgir, Kdm6b, Hilpda, Klrg1, Tent5a 
## Negative:  Cd3d, Cd3g, Cd3e, Cd163l1, Icos, Tcrg-C1, Actn2, Blk, Trdc, Trbc2 
##     Ckb, Ms4a6b, Thy1, Aqp3, Trac, Grap2, Kcnk1, Lsp1, Jaml, Gpr183 
##     Tcf7, Gramd3, Ly6g5b, Igf1r, Avpi1, S1pr1, Bcl11b, Mmp25, Il17re, Rbpms2 
## PC_ 3 
## Positive:  Cd163l1, Trdc, Tcrg-C1, Blk, Tmem176a, Tmem176b, Ckb, Actn2, Kcnk1, Igf1r 
##     Aqp3, Selenop, Ly6g5b, Mmp25, F2r, Rbpms2, Trdv4, S100a6, Plxnd1, Ramp1 
##     Il18r1, Avpi1, Serpinb1a, Lgals1, Il17re, Jaml, Tyrobp, Capg, Psap, Ppp1r14c 
## Negative:  Trac, Cd28, Cd6, Gramd3, Cd4, Ifi27l2a, Cd5, Ccr7, Gm8369, Dusp10 
##     Slamf6, Ms4a6b, Themis, S1pr1, Trbc2, Cd8b1, Rps19, Nsg2, Actn1, Rflnb 
##     Ubash3a, Cd3d, Fam169b, Trat1, Inpp4b, Rpsa, Dapl1, Lef1, Slfn1, Tdrp 
## PC_ 4 
## Positive:  Lmo4, Satb1, Hs3st1, Slc7a8, Ccr7, Lpcat2, Pard3, Rnf128, Arg1, Dach2 
##     Ccdc184, Alox5, Emb, Gm43445, Kit, Klf5, Rflnb, Actn1, Dapl1, Ptgir 
##     Csf2, Tanc2, Tmem176a, Il7r, Bcl2, Fosb, Tmem176b, Nsg2, Eepd1, mt-Nd2 
## Negative:  Tnfrsf4, Foxp3, Ctla4, Hopx, Frmd5, Crip1, Actb, Rgs16, Ikzf2, S100a6 
##     Lgals1, Cd52, Cd5, Tox2, Bcl2a1b, Actg1, S100a4, Trac, AA467197, Il10 
##     S100a10, Vim, Ttc39c, Lgals3, Glrx, Stx11, Tnfrsf9, AW112010, Cd6, S100a11 
## PC_ 5 
## Positive:  Gm42418, Foxp3, Frmd5, Lars2, Tnfrsf4, Ikzf2, Ctla4, Neat1, Psap, Il10 
##     Zbtb20, Pcyt1a, Rgs16, Zeb2, Cpm, Bcl3, Neb, Dst, Itgav, Ccr1 
##     Tox2, Zfp36, Cma1, Egln3, Fos, Ttc39c, Dusp1, Gzma, Ikzf4, Slco2b1 
## Negative:  Rpl32, Rps2, Rplp0, Rpl29, Rpl10, Rpl15, Rpsa, Rps20, Rpl10a, Rpl41 
##     Rpl28, Rps19, Rpl36a, Rps28, Gapdh, Npm1, Ubb, Cd52, Psme2, Hspa8 
##     Cd7, Ptma, Eif5a, Xcl1, Hsp90ab1, Ldha, Tagln2, Nme1, Cd160, Nme2
yang_ilc2tnk_vwat = FindNeighbors(yang_ilc2tnk_vwat,dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
yang_ilc2tnk_vwat = FindClusters(yang_ilc2tnk_vwat,resolution = 0.5)

yang_ilc2tnk_vwat = RunUMAP(yang_ilc2tnk_vwat,n.components = 3,n.neighbors = 30,dims = 1:50,min.dist = 0.1)
## 10:59:28 UMAP embedding parameters a = 1.577 b = 0.8951
## 10:59:28 Read 15945 rows and found 50 numeric columns
## 10:59:28 Using Annoy for neighbor search, n_neighbors = 30
## 10:59:28 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 10:59:31 Writing NN index file to temp file /tmp/RtmpVGN8xs/file7eb77cc5a28
## 10:59:31 Searching Annoy index using 1 thread, search_k = 3000
## 10:59:38 Annoy recall = 100%
## 10:59:38 Commencing smooth kNN distance calibration using 1 thread
## 10:59:39 Initializing from normalized Laplacian + noise
## 10:59:40 Commencing optimization for 200 epochs, with 721904 positive edges
## 10:59:49 Optimization finished
# yang_ilc2tnk_vwat = readRDS(file = paste0(working_dir,"/yang_ilc2tnk_vwat_20230712.rds"))

# DimPlot(yang_ilc2tnk_vwat,group.by = "seurat_clusters",label = TRUE)
DimPlot(yang_ilc2tnk_vwat,group.by = "seurat_clusters",label = TRUE,dims = c(1,3))

# DimPlot(yang_ilc2tnk_vwat,group.by = "seurat_clusters",label = TRUE,dims = c(2,3))

# DimPlot(yang_ilc2tnk_vwat,group.by = "cell_state_label",label = TRUE)
DimPlot(yang_ilc2tnk_vwat,group.by = "cell_state_label",label = TRUE,dims = c(1,3))

# DimPlot(yang_ilc2tnk_vwat,group.by = "cell_state_label",label = TRUE,dims = c(2,3))

4.2 DEG heatmap

Idents(yang_ilc2tnk_vwat) = "seurat_clusters"
# yang_ilc2tnk_vwat_deg = FindAllMarkers(yang_ilc2tnk_vwat)

# write.csv(yang_ilc2tnk_vwat_deg,file = paste0(working_dir,"/yang_ilc2tnk_vwat_deg.csv"))

yang_ilc2tnk_vwat_deg = read.csv(file = paste0(working_dir,"/yang_ilc2tnk_vwat_deg.csv"))

# saveRDS(yang_ilc2tnk_vwat,file = paste0(working_dir,"/yang_ilc2tnk_vwat_20230712.rds"))

top10 = yang_ilc2tnk_vwat_deg %>%
  group_by(cluster) %>%
  top_n(n=10,wt=avg_log2FC)

tmp_seu = yang_ilc2tnk_vwat@meta.data
tmp_seu$cell = rownames(tmp_seu)
tmp_seu = tmp_seu %>% group_by(seurat_clusters) %>% sample_n(40) 
tmp_seu = yang_ilc2tnk_vwat[,tmp_seu$cell]


DoHeatmap(tmp_seu,features = top10$gene,raster = TRUE)

rm(top10,tmp_seu)

4.3 rename clusters

Idents(yang_ilc2tnk_vwat) = "seurat_clusters"
yang_ilc2tnk_vwat = RenameIdents(yang_ilc2tnk_vwat,
                                 c("0" = "0:NKcell",
                                   "1" = "1:Tcell",
                                   "2" = "2:ILC2",
                                   "3" = "3:NK-T",
                                   "4" = "4:ILCP",
                                   "5" = "5:NKcell",
                                   "6" = "6:Tregcell",
                                   "7" = "7:Tcell",
                                   "8" = "8:Tcell",
                                   "9" = "9:inte-ILCP",
                                   "10" = "10:nd",
                                   "11" = "11:nd",
                                   "12" = "12:nd",
                                   "13" = "13:nd"))
yang_ilc2tnk_vwat$celltype1 = Idents(yang_ilc2tnk_vwat)
Idents(yang_ilc2tnk_vwat) = "celltype1"

4.4 T, NK markers

FeaturePlot(yang_ilc2tnk_vwat,features = "Cd3e",dims = c(1,3),order = TRUE)

FeaturePlot(yang_ilc2tnk_vwat,features = "Cd4",dims = c(1,3),order = TRUE)

FeaturePlot(yang_ilc2tnk_vwat,features = "Cd8a",dims = c(1,3),order = TRUE)

FeaturePlot(yang_ilc2tnk_vwat,features = "Klrb1c",dims = c(1,3),order = TRUE)

FeaturePlot(yang_ilc2tnk_vwat,features = "Foxp3",dims = c(1,3),order = TRUE)

DotPlot(yang_ilc2tnk_vwat,features = c("Cd3e","Cd4","Cd8a","Klrb1c","Foxp3"))+
  coord_flip()+
  theme(axis.text.x = element_text(
    angle = 90,hjust = 1,vjust = 0.5
  ))

# FeaturePlot(yang_ilc2tnk_vwat,features = "Roc",dims = c(1,3),order = TRUE)

4.5 ILC markers

tmp_genes = c("Il7r","Il18r1","Tbx21","Ikzf3","Eomes","Kir2dl3","Gata3","Maf","Ptgdr2","Hpgds","Rorc","Il23r","Il1r1","Kit")

DotPlot(yang_ilc2tnk_vwat,features = tmp_genes)+
  coord_flip()+
  theme(axis.text.x = element_text(
    angle = 90,hjust = 1,vjust = 0.5
  ))

5 extract ILC2 cells

5.1 re-do seurat analysis

Idents(yang_ilc2tnk_vwat) = "seurat_clusters"

yang_vwat_ilc2 = subset(yang_ilc2tnk_vwat,idents=c(4,9,2))
yang_vwat_ilc2$tnkilc2_cluster = yang_vwat_ilc2$seurat_clusters

# DimPlot(yang_vwat_ilc2,label = TRUE,split.by = "intervention_group",ncol=2)
# 
yang_vwat_ilc2 = NormalizeData(yang_vwat_ilc2)
yang_vwat_ilc2 = FindVariableFeatures(yang_vwat_ilc2,selection.method = "vst",nfeatures = 2000)
yang_vwat_ilc2 = ScaleData(yang_vwat_ilc2)
## Centering and scaling data matrix
yang_vwat_ilc2 = RunPCA(yang_vwat_ilc2,features = VariableFeatures(yang_vwat_ilc2))
## PC_ 1 
## Positive:  Il1rl1, Nfkb1, Ctla2a, Gadd45b, Nr4a1, Nfkbiz, Gata3, Nfkbia, Neurl3, Hilpda 
##     Tspan13, Furin, Fosb, Zfp36, Rnf128, Jund, Areg, Tent5a, Arg1, Gm43305 
##     Ptpn13, Hs3st1, Ccdc184, Nfkbid, Klrg1, Slc7a8, Bhlhe40, Gm42031, Junb, Csf2 
## Negative:  Trdc, Cd163l1, Tcrg-C1, Cd3e, Cd3g, Cd3d, S100a6, Actn2, Il18r1, Blk 
##     Ckb, Tmem176a, Cd164, Slc16a6, Lsp1, Tmem176b, Cd247, F2r, Igf1r, Aqp3 
##     Lgals1, Gpx1, Capg, Jaml, Mmp25, Icos, S100a11, Ifngr1, S100a4, Ly6g5b 
## PC_ 2 
## Positive:  Bcl2a1b, Nr4a3, Actg1, Kdm6b, Trac, Bcl2a1d, Sub1, H3f3b, Vps37b, Tnfrsf4 
##     Dgat1, Ell2, Ubash3a, Cd40lg, Crem, Nabp1, Cd2, Rel, Cd4, Cd28 
##     Ramp3, Dot1l, Neurl3, Kdm2b, Odc1, Cd5, Hif1a, Hilpda, Dhx40, H2afz 
## Negative:  Fosb, Fos, Enah, Klf6, Stc2, Lpcat2, Ppp1r15a, Ighm, Dach2, Dusp1 
##     2410006H16Rik, mt-Atp6, Dyrk3, mt-Co3, mt-Co2, Rnf128, Zfp36, Cd24a, Txnip, Gm26802 
##     Olfr1259, Plod2, Zfp1, Mns1, Filip1l, Ttc28, Jun, Sptbn1, Gm43445, Cd27 
## PC_ 3 
## Positive:  Cd28, Trac, Cd2, Cd40lg, Cd4, Cd6, Lck, Cd5, Ms4a4b, Ubash3a 
##     Tnfrsf4, Gramd3, Ifi27l2a, Trat1, Bcl2a1b, Ccl5, Il21r, Fyb, Fasl, Nkg7 
##     Themis, AW112010, Ms4a6b, Tnfsf8, Got1, Tbc1d4, Slamf6, Trbc2, Itgb1, Ly6c2 
## Negative:  Lmo4, Il2ra, Irs2, Hs3st1, Sdc4, Furin, Tmem176a, Tmem176b, Gpr65, Zfp36l1 
##     Rab27b, Id2, Gm43305, Pdcd1, Alox5, Cdkn1a, Fam129a, Cebpb, Psap, Selenop 
##     Calca, Ets2, Lpcat2, Igsf5, Gm42031, Itgav, Klf5, Igf1r, Kit, Hilpda 
## PC_ 4 
## Positive:  Vps37b, Satb1, Ramp3, Crem, Gm42418, Lmo4, Cebpb, Icos, Calca, Ddx3y 
##     Kctd12, Gramd3, Eprs, Lars2, Jmy, Dgat1, Hs3st1, Kdm2b, Dusp10, Ms4a4b 
##     Homer1, mt-Nd2, Cwc25, Igf1r, Bcl2l11, Aplp2, Hes1, Ubc, Tnfrsf1b, Gm19585 
## Negative:  S100a10, Tmsb4x, Vim, Crip1, S100a4, Lgals1, Arg1, Rgcc, S100a11, S100a6 
##     Ubb, Rbpj, Cd44, Ramp1, Csf2, Klf6, Ctla2a, Ahnak, Txn1, Ucp2 
##     Cntn1, Tnfsf8, Cd52, Plscr1, AA467197, Tnfsf4, Tnfsf11, Alcam, Bace2, Gm20627 
## PC_ 5 
## Positive:  Malat1, Zfp36l2, Klf6, Pdcd4, mt-Atp8, Bcl2, Ccl5, Slc38a2, mt-Co1, Ahnak 
##     Txnip, AY036118, Wnk1, Klf9, Nkg7, mt-Nd3, Nr1d1, Cd69, Fosb, Cdk6 
##     Rab27a, Ppp1r15a, Fyb, Klrd1, Fcer1g, Peli1, Lck, mt-Nd2, Pgm2l1, mt-Nd1 
## Negative:  Tmsb4x, Cstb, Fth1, Hilpda, Dgat1, Odc1, Trdc, Actg1, Sub1, Ramp3 
##     S100a4, Pxdc1, Cd52, Krt83, H3f3b, H2afz, Bcl2a1b, Crem, Ramp1, Actn2 
##     Rplp0, Rps2, S100a11, Avpi1, Cwc25, S100a6, Kdm2b, Cebpb, Tnfrsf9, Csf2
yang_vwat_ilc2 = FindNeighbors(yang_vwat_ilc2,dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
yang_vwat_ilc2 = FindClusters(yang_vwat_ilc2,resolution = 0.5)

yang_vwat_ilc2 = RunUMAP(yang_vwat_ilc2,dims = 1:50,
                         n.components = 3,
                         min.dist = 1,
                         a = 0.01,
                         b = 1)
## 11:00:07 Read 3379 rows and found 50 numeric columns
## 11:00:07 Using Annoy for neighbor search, n_neighbors = 30
## 11:00:07 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:00:07 Writing NN index file to temp file /tmp/RtmpVGN8xs/file7eb72bdc14
## 11:00:07 Searching Annoy index using 1 thread, search_k = 3000
## 11:00:08 Annoy recall = 100%
## 11:00:09 Commencing smooth kNN distance calibration using 1 thread
## 11:00:10 Initializing from normalized Laplacian + noise
## 11:00:10 Commencing optimization for 500 epochs, with 145712 positive edges
## 11:00:15 Optimization finished
DimPlot(yang_vwat_ilc2,group.by = "tnkilc2_cluster",label = TRUE)

DimPlot(yang_vwat_ilc2,group.by = "seurat_clusters",label = TRUE)

# DimPlot(yang_vwat_ilc2,group.by = "seurat_clusters",label = TRUE,dims = c(1,3))
# DimPlot(yang_vwat_ilc2,group.by = "seurat_clusters",label = TRUE,dims = c(2,3))

5.2 umap split by condition

DimPlot(yang_vwat_ilc2,group.by = "seurat_clusters",label = TRUE,split.by = "intervention_group",ncol=2)

5.3 DEG

yang_vwat_ilc2_deg = FindAllMarkers(yang_vwat_ilc2)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
write.csv(yang_vwat_ilc2_deg,file = paste0(working_dir,"/manuscript/data/yang_vwat_ilc2_deg.csv"))
# yang_ilc2tnk_vwat_deg = read.csv(file = paste0(working_dir,"/manuscript/data/yang_ilc2tnk_vwat.deg.csv"))
top10 = yang_vwat_ilc2_deg %>%
  subset(avg_log2FC >1) %>%
  group_by(cluster) %>%
  top_n(n=30,wt=avg_log2FC)


DoHeatmap(yang_vwat_ilc2,features = top10$gene)

6 session infor

saveRDS(yang_vwat_ilc2,file = paste0(working_dir,"/manuscript/data/yang_vwat_ilc2.rds"))
sessionInfo()